In [2]:
import re
import json
from bs4 import BeautifulSoup
import requests
In [18]:
# The html was extracted from the Hyperlinks to company disclosure column of spreadsheet
with open('/Users/caged/Desktop/Untitled 1.html', 'r') as f:
page = f.read()
In [16]:
soup = BeautifulSoup(page, 'html.parser')
In [17]:
companies = []
for link in soup.find_all('a'):
companies.append(dict(name=link.text, href=link.get('href')))
companies
Out[17]:
[{'href': 'https://www.sec.gov/Archives/edgar/data/12927/000001292717000032/0000012927-17-000032-index.htm',
'name': 'Boeing\xa0'},
{'href': 'https://www.sec.gov/Archives/edgar/data/936468/000119312517176575/0001193125-17-176575-index.htm',
'name': 'Lockheed Martin'},
{'href': 'https://www.sec.gov/Archives/edgar/data/101829/000010182917000020/0000101829-17-000020-index.htm',
'name': 'United Technologies'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1047122/000104712217000092/0001047122-17-000092-index.htm',
'name': 'Raytheon'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1133421/000113342117000027/0001133421-17-000027-index.htm',
'name': 'Northrop Grumman'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1137411/000113741117000078/0001137411-17-000078-index.htm',
'name': 'Rockwell Collins, Inc.'},
{'href': 'https://www.sec.gov/Archives/edgar/data/40533/000119312517188658/0001193125-17-188658-index.htm',
'name': 'General Dynamics'},
{'href': 'https://www.sec.gov/Archives/edgar/data/95029/000117494717000940/0001174947-17-000940-index.htm',
'name': 'Sturm Ruger & Co'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1092796/000119312517189353/0001193125-17-189353-index.htm',
'name': 'American Outdoor Brands\xa0'},
{'href': 'https://www.sec.gov/Archives/edgar/data/789019/000119312517189445/0001193125-17-189445-index.htm',
'name': 'Microsoft'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1645590/000162828017006024/0001628280-17-006024-index.htm',
'name': 'Hewlett Packard'},
{'href': 'https://www.sec.gov/Archives/edgar/data/51143/000110465917035702/0001104659-17-035702-index.htm',
'name': 'IBM'},
{'href': 'https://www.sec.gov/Archives/edgar/data/896878/000089687817000050/0000896878-17-000050-index.htm',
'name': 'Intuit'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1048695/000104869517000018/0001048695-17-000018-index.htm',
'name': 'F5 Networks'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1262039/000119312517187503/0001193125-17-187503-index.htm',
'name': 'Fortinet'},
{'href': 'https://www.sec.gov/Archives/edgar/data/813672/000119312517189442/0001193125-17-189442-index.htm',
'name': 'Cadence Design Systems'},
{'href': 'https://www.sec.gov/Archives/edgar/data/849399/000119312517187428/0001193125-17-187428-index.htm',
'name': 'Symantec'},
{'href': 'https://www.sec.gov/Archives/edgar/data/769397/000076939717000032/0000769397-17-000032-index.htm',
'name': 'Autodesk'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1341439/000119312517189252/0001193125-17-189252-index.htm',
'name': 'Oracle'},
{'href': 'https://www.sec.gov/Archives/edgar/data/796343/000079634317000117/0000796343-17-000117-index.htm',
'name': 'Adobe Systems'},
{'href': 'https://www.sec.gov/Archives/edgar/data/37996/000003799617000051/0000037996-17-000051-index.htm',
'name': 'Ford Motor'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1318605/000156459017011856/0001564590-17-011856-index.htm',
'name': 'Tesla\xa0'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1467858/000146785817000080/0001467858-17-000080-index.htm',
'name': 'General Motors'},
{'href': 'https://www.sec.gov/Archives/edgar/data/715153/000119312517186215/0001193125-17-186215-index.htm',
'name': 'Honda Motor'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1094517/000119312517188197/0001193125-17-188197-index.htm',
'name': 'Toyota Motor'},
{'href': 'https://www.sec.gov/Archives/edgar/data/926042/000119312517188212/0001193125-17-188212-index.htm',
'name': 'Tata Motors'},
{'href': 'https://www.sec.gov/Archives/edgar/data/833444/000083344417000020/0000833444-17-000020-index.htm',
'name': 'Johnson Controls'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1521332/000152133217000037/0001521332-17-000037-index.htm',
'name': 'Delphi Automotive'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1034670/000119312517189214/0001193125-17-189214-index.htm',
'name': 'Autoliv'},
{'href': 'https://www.sec.gov/Archives/edgar/data/749098/000110465917036451/0001104659-17-036451-index.htm',
'name': 'Magna International'},
{'href': 'https://www.sec.gov/Archives/edgar/data/842162/000084216217000016/0000842162-17-000016-index.htm',
'name': 'Lear'},
{'href': 'https://www.sec.gov/Archives/edgar/data/908255/000090825517000028/0000908255-17-000028-index.htm',
'name': 'BorgWarner'},
{'href': 'https://www.sec.gov/Archives/edgar/data/62996/000006299617000026/0000062996-17-000026-index.htm',
'name': 'Masco'},
{'href': 'https://www.sec.gov/Archives/edgar/data/67716/000006771617000052/0000067716-17-000052-index.htm',
'name': 'MDU Resources Group'},
{'href': 'https://www.sec.gov/Archives/edgar/data/849395/000119312517188479/0001193125-17-188479-index.htm',
'name': 'CRH'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1370946/000119312517188419/0001193125-17-188419-index.htm',
'name': 'Owens Corning'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1159152/000119312517188580/0001193125-17-188580-index.htm',
'name': 'James Hardie Industries'},
{'href': 'https://www.sec.gov/Archives/edgar/data/757011/000075701117000042/0000757011-17-000042-index.htm',
'name': 'USG'},
{'href': 'https://www.sec.gov/Archives/edgar/data/883980/000110465917036578/0001104659-17-036578-index.htm',
'name': 'First Data'},
{'href': 'https://www.sec.gov/Archives/edgar/data/723254/000072325417000016/0000723254-17-000016-index.htm',
'name': 'Cintas'},
{'href': 'https://www.sec.gov/Archives/edgar/data/714603/000071460317000032/0000714603-17-000032-index.htm',
'name': 'DST Systems'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1492633/000119312517179675/0001193125-17-179675-index.htm',
'name': 'Nielsen Holdings'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1443646/000144364617000064/0001443646-17-000064-index.htm',
'name': 'Booz Allen Hamilton\xa0'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1101215/000110121517000122/0001101215-17-000122-index.htm',
'name': 'Alliance Data Systems'},
{'href': 'https://www.sec.gov/Archives/edgar/data/31462/000155837017004608/0001558370-17-004608-index.htm',
'name': 'Ecolab'},
{'href': 'https://www.sec.gov/Archives/edgar/data/89800/000119312517187554/0001193125-17-187554-index.htm',
'name': 'Sherwin-Williams'},
{'href': 'https://www.sec.gov/Archives/edgar/data/79879/000007987917000059/0000079879-17-000059-index.htm',
'name': 'PPG Industries'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1489393/000119312517187579/0001193125-17-187579-index.htm',
'name': 'LyondellBasell Industries'},
{'href': 'https://www.sec.gov/Archives/edgar/data/884905/000088490517000046/0000884905-17-000046-index.htm',
'name': 'Praxair'},
{'href': 'https://www.sec.gov/Archives/edgar/data/915913/000119312517176720/0001193125-17-176720-index.htm',
'name': 'Albemarle'},
{'href': 'https://www.sec.gov/Archives/edgar/data/804328/000123445217000106/0001234452-17-000106-index.htm',
'name': 'Qualcomm'},
{'href': 'https://www.sec.gov/Archives/edgar/data/202058/000020205817000027/0000202058-17-000027-index.htm',
'name': 'Harris Corporation\xa0'},
{'href': 'https://www.sec.gov/Archives/edgar/data/924613/000094787117000411/0000947871-17-000411-index.htm',
'name': 'Nokia'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1043604/000119312517189283/0001193125-17-189283-index.htm',
'name': 'Juniper Networks'},
{'href': 'https://www.sec.gov/Archives/edgar/data/68505/000119312517183817/0001193125-17-183817-index.htm',
'name': 'Motorola Solutions'},
{'href': 'https://www.sec.gov/Archives/edgar/data/858877/000119312517183773/0001193125-17-183773-index.htm',
'name': 'Cisco Systems'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1327567/000132756717000010/0001327567-17-000010-index.htm',
'name': 'Palo Alto Networks'},
{'href': 'https://www.sec.gov/Archives/edgar/data/717826/000119312517178692/0001193125-17-178692-index.htm',
'name': 'LM Ericsson Telephone'},
{'href': 'https://www.sec.gov/Archives/edgar/data/732712/000119312517188465/0001193125-17-188465-index.htm',
'name': 'Verizon Communications'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1117795/000119312517183194/0001193125-17-183194-index.htm',
'name': 'China Mobile'},
{'href': 'https://www.sec.gov/Archives/edgar/data/839923/000110465917035861/0001104659-17-035861-index.htm',
'name': 'Vodafone Group'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1282266/000128226617000034/0001282266-17-000034-index.htm',
'name': 'Windstream Holdings'},
{'href': 'https://www.sec.gov/Archives/edgar/data/756620/000119312517182761/0001193125-17-182761-index.htm',
'name': 'BT Group'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1166141/000119312517184453/0001193125-17-184453-index.htm',
'name': 'NTT DOCOMO'},
{'href': 'https://www.sec.gov/Archives/edgar/data/769594/000119312517188269/0001193125-17-188269-index.htm',
'name': 'Nippon Telegraph'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1289308/000128930817000023/0001289308-17-000023-index.htm',
'name': 'EnerSys'},
{'href': 'https://www.sec.gov/Archives/edgar/data/320193/000119312517159397/0001193125-17-159397-index.htm',
'name': 'Apple'},
{'href': 'https://www.sec.gov/Archives/edgar/data/47217/000004721717000016/0000047217-17-000016-index.htm',
'name': 'HP'},
{'href': 'https://www.sec.gov/Archives/edgar/data/313838/000090342317000401/0000903423-17-000401-index.htm',
'name': 'Sony'},
{'href': 'https://www.sec.gov/Archives/edgar/data/106040/000119312517189443/0001193125-17-189443-index.htm',
'name': 'Western Digital'},
{'href': 'https://www.sec.gov/Archives/edgar/data/57083/000119312517186201/0001193125-17-186201-index.htm',
'name': 'Kyocera'},
{'href': 'https://www.sec.gov/Archives/edgar/data/866374/000110465917034301/0001104659-17-034301-index.htm',
'name': 'Flex'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1137789/000119312517185523/0001193125-17-185523-index.htm',
'name': 'Seagate Technology'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1385157/000110465917036205/0001104659-17-036205-index.htm',
'name': 'TE Connectivity'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1290109/000119312517188206/0001193125-17-188206-index.htm',
'name': 'LG Display'},
{'href': 'https://www.sec.gov/Archives/edgar/data/820313/000110465917036525/0001104659-17-036525-index.htm',
'name': 'Amphenol'},
{'href': 'https://www.sec.gov/Archives/edgar/data/864749/000119312517189225/0001193125-17-189225-index.htm',
'name': 'Trimble\xa0'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1121788/000161577417002818/0001615774-17-002818-index.htm',
'name': 'Garmin'},
{'href': 'https://www.sec.gov/Archives/edgar/data/913142/000091314217000020/0000913142-17-000020-index.htm',
'name': 'Belden'},
{'href': 'https://www.sec.gov/Archives/edgar/data/24741/000002474117000031/0000024741-17-000031-index.htm',
'name': 'Corning'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1144215/000114421517000056/0001144215-17-000056-index.htm',
'name': 'Acuity Brands'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1477294/000147729417000064/0001477294-17-000064-index.htm',
'name': 'Sensata Technologies'},
{'href': 'https://www.sec.gov/Archives/edgar/data/8858/000155837017004534/0001558370-17-004534-index.htm',
'name': 'Avnet'},
{'href': 'https://www.sec.gov/Archives/edgar/data/814453/000081445317000063/0000814453-17-000063-index.htm',
'name': 'Newell Brands'},
{'href': 'https://www.sec.gov/Archives/edgar/data/21665/000093041317002256/0000930413-17-002256-index.htm',
'name': 'Colgate-Palmolive'},
{'href': 'https://www.sec.gov/Archives/edgar/data/80424/000008042417000032/0000080424-17-000032-index.htm',
'name': 'Procter & Gamble'},
{'href': 'https://www.sec.gov/Archives/edgar/data/55785/000005578517000039/0000055785-17-000039-index.htm',
'name': 'Kimberly-Clark'},
{'href': 'https://www.sec.gov/Archives/edgar/data/217410/000165495417005208/0001654954-17-005208-index.htm',
'name': 'Unilever'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1001250/000110465917036282/0001104659-17-036282-index.htm',
'name': 'The Estee Lauder'},
{'href': 'https://www.sec.gov/Archives/edgar/data/200406/000020040617000027/0000200406-17-000027-index.htm',
'name': 'Johnson & Johnson'},
{'href': 'https://www.sec.gov/Archives/edgar/data/310158/000031015817000023/0000310158-17-000023-index.htm',
'name': 'Merck & Co'},
{'href': 'https://www.sec.gov/Archives/edgar/data/78003/000119312517189371/0001193125-17-189371-index.htm',
'name': 'Pfizer'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1114448/000110465917036483/0001104659-17-036483-index.htm',
'name': 'Novartis'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1121404/000119312517168297/0001193125-17-168297-index.htm',
'name': 'Sanofi'},
{'href': 'https://www.sec.gov/Archives/edgar/data/18230/000001823017000164/0000018230-17-000164-index.htm',
'name': 'Caterpillar'},
{'href': 'https://www.sec.gov/Archives/edgar/data/792987/000079298717000021/0000792987-17-000021-index.htm',
'name': 'Astec'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1567094/000119312517188830/0001193125-17-188830-index.htm',
'name': 'CNH Industrial'},
{'href': 'https://www.sec.gov/Archives/edgar/data/315189/000110465917035226/0001104659-17-035226-index.htm',
'name': 'Deere'},
{'href': 'https://www.sec.gov/Archives/edgar/data/880266/000088026617000020/0000880266-17-000020-index.htm',
'name': 'AGCO'},
{'href': 'https://www.sec.gov/Archives/edgar/data/97216/000009721617000155/0000097216-17-000155-index.htm',
'name': 'Terex'},
{'href': 'https://www.sec.gov/Archives/edgar/data/313216/000119312517184800/0001193125-17-184800-index.htm',
'name': 'Royal Philips'},
{'href': 'https://www.sec.gov/Archives/edgar/data/40545/000004054517000031/0000040545-17-000031-index.htm',
'name': 'General Electric'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1091587/000110465917036172/0001104659-17-036172-index.htm',
'name': 'ABB'},
{'href': 'https://www.sec.gov/Archives/edgar/data/16988/000119312517186208/0001193125-17-186208-index.htm',
'name': 'Canon'},
{'href': 'https://www.sec.gov/Archives/edgar/data/49826/000004982617000113/0000049826-17-000113-index.htm',
'name': 'Illinois Tool Works'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1024478/000119312517188426/0001193125-17-188426-index.htm',
'name': 'Rockwell Automation'},
{'href': 'https://www.sec.gov/Archives/edgar/data/66740/000110465917036452/0001104659-17-036452-index.htm',
'name': '3M'},
{'href': 'https://www.sec.gov/Archives/edgar/data/93556/000009355617000014/0000093556-17-000014-index.htm',
'name': 'Stanley Black & Decker'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1551182/000155118217000141/0001551182-17-000141-index.htm',
'name': 'Eaton'},
{'href': 'https://www.sec.gov/Archives/edgar/data/882835/000119312517186593/0001193125-17-186593-index.htm',
'name': 'Roper Technologies'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1466258/000146625817000150/0001466258-17-000150-index.htm',
'name': 'Ingersoll-Rand'},
{'href': 'https://www.sec.gov/Archives/edgar/data/26172/000089706917000337/0000897069-17-000337-index.htm',
'name': 'Cummins'},
{'href': 'https://www.sec.gov/Archives/edgar/data/29905/000002990517000027/0000029905-17-000027-index.htm',
'name': 'Dover'},
{'href': 'https://www.sec.gov/Archives/edgar/data/32604/000095013817000449/0000950138-17-000449-index.htm',
'name': 'Emerson Electric'},
{'href': 'https://www.sec.gov/Archives/edgar/data/313616/000119312517186381/0001193125-17-186381-index.htm',
'name': 'Danaher'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1037868/000119312517186958/0001193125-17-186958-index.htm',
'name': 'AMETEK'},
{'href': 'https://www.sec.gov/Archives/edgar/data/773840/000093041317002251/0000930413-17-002251-index.htm',
'name': 'Honeywell International'},
{'href': 'https://www.sec.gov/Archives/edgar/data/76334/000119312517188395/0001193125-17-188395-index.htm',
'name': 'Parker-Hannifin'},
{'href': 'https://www.sec.gov/Archives/edgar/data/8818/000110465917035682/0001104659-17-035682-index.htm',
'name': 'Avery Dennison'},
{'href': 'https://www.sec.gov/Archives/edgar/data/82166/000008216617000063/0000082166-17-000063-index.htm',
'name': 'Raven Industries'},
{'href': 'https://www.sec.gov/Archives/edgar/data/103379/000119312517185479/0001193125-17-185479-index.htm',
'name': 'V F'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1359841/000135984117000072/0001359841-17-000072-index.htm',
'name': 'Hanesbrands'},
{'href': 'https://www.sec.gov/Archives/edgar/data/320187/000032018717000051/0000320187-17-000051-index.htm',
'name': 'Nike'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1530721/000119312517189324/0001193125-17-189324-index.htm',
'name': 'Michael Kors Holdings'},
{'href': 'https://www.sec.gov/Archives/edgar/data/106640/000010664017000045/0000106640-17-000045-index.htm',
'name': 'Whirlpool'},
{'href': 'https://www.sec.gov/Archives/edgar/data/58492/000119312517185045/0001193125-17-185045-index.htm',
'name': 'Leggett & Platt'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1336917/000133691717000031/0001336917-17-000031-index.htm',
'name': 'Under Armour'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1037038/000119312517183841/0001193125-17-183841-index.htm',
'name': 'Ralph Lauren'},
{'href': 'https://www.sec.gov/Archives/edgar/data/851968/000085196817000059/0000851968-17-000059-index.htm',
'name': 'Mohawk Industries'},
{'href': 'https://www.sec.gov/Archives/edgar/data/310764/000031076417000119/0000310764-17-000119-index.htm',
'name': 'Stryker'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1800/000110465917036399/0001104659-17-036399-index.htm',
'name': 'Abbott Laboratories'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1613103/000161310317000010/0001613103-17-000010-index.htm',
'name': 'Medtronic'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1035267/000103526717000087/0001035267-17-000087-index.htm',
'name': 'Intuitive Surgical'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1099800/000109980017000020/0001099800-17-000020-index.htm',
'name': 'Edwards Lifesciences'},
{'href': 'https://www.sec.gov/Archives/edgar/data/885725/000110465917032804/0001104659-17-032804-index.htm',
'name': 'Boston Scientific'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1136869/000119312517188251/0001193125-17-188251-index.htm',
'name': 'Zimmer Biomet Holdings'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1002242/000117494717000948/0001174947-17-000948-index.htm',
'name': 'Eni'},
{'href': 'https://www.sec.gov/Archives/edgar/data/879764/000119312517188505/0001193125-17-188505-index.htm',
'name': 'Total'},
{'href': 'https://www.sec.gov/Archives/edgar/data/34088/000003408817000029/0000034088-17-000029-index.htm',
'name': 'Exxon Mobil'},
{'href': 'https://www.sec.gov/Archives/edgar/data/49938/000119312517181513/0001193125-17-181513-index.htm',
'name': 'Imperial Oil'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1306965/000119312517157168/0001193125-17-157168-index.htm',
'name': 'Royal Dutch Shell'},
{'href': 'https://www.sec.gov/Archives/edgar/data/93410/000009341017000021/0000093410-17-000021-index.htm',
'name': 'Chevron'},
{'href': 'https://www.sec.gov/Archives/edgar/data/808362/000095010317005133/0000950103-17-005133-index.htm',
'name': 'Baker Hughes'},
{'href': 'https://www.sec.gov/Archives/edgar/data/45012/000004501217000112/0000045012-17-000112-index.htm',
'name': 'Halliburton'},
{'href': 'https://www.sec.gov/Archives/edgar/data/87347/000119312517179029/0001193125-17-179029-index.htm',
'name': 'Schlumberger'},
{'href': 'https://www.sec.gov/Archives/edgar/data/73756/000007375617000040/0000073756-17-000040-index.htm',
'name': 'Oceaneering International'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1021860/000119312517188869/0001193125-17-188869-index.htm',
'name': 'National Oilwell Varco'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1000229/000100022917000070/0001000229-17-000070-index.htm',
'name': 'Core Laboratories'},
{'href': 'https://www.sec.gov/Archives/edgar/data/42582/000119312517188603/0001193125-17-188603-index.htm',
'name': 'Goodyear Tire & Rubber'},
{'href': 'https://www.sec.gov/Archives/edgar/data/730263/000114420417030135/0001144204-17-030135-index.htm',
'name': 'Thor Industries'},
{'href': 'https://www.sec.gov/Archives/edgar/data/793952/000079395217000026/0000793952-17-000026-index.htm',
'name': 'Harley-Davidson'},
{'href': 'https://www.sec.gov/Archives/edgar/data/931015/000162828017006080/0001628280-17-006080-index.htm',
'name': 'Polaris Industries'},
{'href': 'https://www.sec.gov/Archives/edgar/data/790051/000162828017005881/0001628280-17-005881-index.htm',
'name': 'Carlisle Companies'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1563411/000119312517187680/0001193125-17-187680-index.htm',
'name': 'Constellium'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1001039/000100103917000098/0001001039-17-000098-index.htm',
'name': 'Walt Disney'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1413329/000141332917000032/0001413329-17-000032-index.htm',
'name': 'Philip Morris\xa0'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1668717/000119312517184448/0001193125-17-184448-index.htm',
'name': 'Anheuser-Busch InBev'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1067983/000119312517188762/0001193125-17-188762-index.htm',
'name': 'Berkshire Hathaway'},
{'href': 'https://www.sec.gov/Archives/edgar/data/353278/000095010317005127/0000950103-17-005127-index.htm',
'name': 'Novo-Nordisk'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1034563/000081376217000019/0000813762-17-000019-index.htm',
'name': 'Icahn Enterprises'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1652044/000165204417000017/0001652044-17-000017-index.htm',
'name': 'Alphabet'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1012100/000119312517185019/0001193125-17-185019-index.htm',
'name': 'Sealed Air'},
{'href': 'https://www.sec.gov/Archives/edgar/data/9389/000000938917000020/0000009389-17-000020-index.htm',
'name': 'Ball'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1636023/000117184317003392/0001171843-17-003392-index.htm',
'name': 'WestRock'},
{'href': 'https://www.sec.gov/Archives/edgar/data/91767/000009176717000029/0000091767-17-000029-index.htm',
'name': 'Sonoco Products'},
{'href': 'https://www.sec.gov/Archives/edgar/data/896622/000110465917036509/0001104659-17-036509-index.htm',
'name': 'AptarGroup'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1408075/000140807517000023/0001408075-17-000023-index.htm',
'name': 'Graphic Packaging Holding'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1219601/000119312517189538/0001193125-17-189538-index.htm',
'name': 'Crown Holdings'},
{'href': 'https://www.sec.gov/Archives/edgar/data/886158/000117184317003400/0001171843-17-003400-index.htm',
'name': 'Bed Bath & Beyond'},
{'href': 'https://www.sec.gov/Archives/edgar/data/60667/000006066717000084/0000060667-17-000084-index.htm',
'name': "Lowe's Companies"},
{'href': 'https://www.sec.gov/Archives/edgar/data/98246/000009824617000131/0000098246-17-000131-index.htm',
'name': 'Tiffany'},
{'href': 'https://www.sec.gov/Archives/edgar/data/354950/000035495017000018/0000354950-17-000018-index.htm',
'name': 'Home Depot'},
{'href': 'https://www.sec.gov/Archives/edgar/data/109198/000119312517189493/0001193125-17-189493-index.htm',
'name': 'TJX Companies'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1116132/000115752317001705/0001157523-17-001705-index.htm',
'name': 'Coach'},
{'href': 'https://www.sec.gov/Archives/edgar/data/39911/000003991117000086/0000039911-17-000086-index.htm',
'name': 'Gap'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1018724/000101872417000088/0001018724-17-000088-index.htm',
'name': 'Amazon.com'},
{'href': 'https://www.sec.gov/Archives/edgar/data/701985/000070198517000031/0000701985-17-000031-index.htm',
'name': 'L Brands'},
{'href': 'https://www.sec.gov/Archives/edgar/data/857471/000110465917031922/0001104659-17-031922-index.htm',
'name': 'Luxottica Group'},
{'href': 'https://www.sec.gov/Archives/edgar/data/745732/000074573217000023/0000745732-17-000023-index.htm',
'name': 'Ross Stores'},
{'href': 'https://www.sec.gov/Archives/edgar/data/50863/000119312517174575/0001193125-17-174575-index.htm',
'name': 'Intel'},
{'href': 'https://www.sec.gov/Archives/edgar/data/723125/000072312517000065/0000723125-17-000065-index.htm',
'name': 'Micron Technology'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1413447/000119312517188186/0001193125-17-188186-index.htm',
'name': 'NXP Semiconductors'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1046179/000119312517180408/0001193125-17-180408-index.htm',
'name': 'Taiwan Semiconductor'},
{'href': 'https://www.sec.gov/Archives/edgar/data/97476/000156459017011922/0001564590-17-011922-index.htm',
'name': 'Texas Instruments'},
{'href': 'https://www.sec.gov/Archives/edgar/data/791915/000079191517000032/0000791915-17-000032-index.htm',
'name': 'Cypress'},
{'href': 'https://www.sec.gov/Archives/edgar/data/937966/000134100417000337/0001341004-17-000337-index.htm',
'name': 'ASML Holding'},
{'href': 'https://www.sec.gov/Archives/edgar/data/6951/000119312517184979/0001193125-17-184979-index.htm',
'name': 'Applied Materials'},
{'href': 'https://www.sec.gov/Archives/edgar/data/867773/000086777317000043/0000867773-17-000043-index.htm',
'name': 'SunPower\xa0'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1375877/000110465917036384/0001104659-17-036384-index.htm',
'name': 'Canadian Solar\xa0'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1419612/000117891317001609/0001178913-17-001609-index.htm',
'name': 'SolarEdge Technologies, Inc.\xa0'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1274494/000127449417000028/0001274494-17-000028-index.htm',
'name': 'First Solar\xa0'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1243429/000090342317000372/0000903423-17-000372-index.htm',
'name': 'ArcelorMittal'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1190723/000119312517172833/0001193125-17-172833-index.htm',
'name': 'Tenaris'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1022671/000110465917035567/0001104659-17-035567-index.htm',
'name': 'Steel Dynamics'},
{'href': 'https://www.sec.gov/Archives/edgar/data/861884/000110465917037301/0001104659-17-037301-index.htm',
'name': 'Reliance Steel & Aluminum'},
{'href': 'https://www.sec.gov/Archives/edgar/data/73309/000119312517188620/0001193125-17-188620-index.htm',
'name': 'Nucor'},
{'href': 'https://www.sec.gov/Archives/edgar/data/889132/000119312517188189/0001193125-17-188189-index.htm',
'name': 'POSCO'},
{'href': 'https://www.sec.gov/Archives/edgar/data/46080/000004608017000075/0000046080-17-000075-index.htm',
'name': 'Hasbro'},
{'href': 'https://www.sec.gov/Archives/edgar/data/63276/000119312517188190/0001193125-17-188190-index.htm',
'name': 'Mattel'},
{'href': 'https://www.sec.gov/Archives/edgar/data/14930/000001493017000091/0000014930-17-000091-index.htm',
'name': 'Brunswick'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1616318/000161631817000104/0001616318-17-000104-index.htm',
'name': 'Vista Outdoor'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1564902/000156459017011941/0001564590-17-011941-index.htm',
'name': 'SeaWorld Entertainment'},
{'href': 'https://www.sec.gov/Archives/edgar/data/1078207/000107820717000027/0001078207-17-000027-index.htm',
'name': 'Nautilus'},
{'href': 'https://www.sec.gov/Archives/edgar/data/837465/000083746517000006/0000837465-17-000006-index.htm',
'name': 'Callaway Golf'},
{'href': 'https://www.sec.gov/Archives/edgar/data/945841/000094584117000104/0000945841-17-000104-index.htm',
'name': 'Pool'}]
In [25]:
response = requests.get('https://www.sec.gov/Archives/edgar/data/796343/000079634317000117/0000796343-17-000117-index.htm')
soup = BeautifulSoup(response.content, 'html.parser')
soup.find(class_='companyInfo').find('a').text
Out[25]:
'0000796343 (see all company filings)'
In [30]:
cik = re.findall(r'\d+', '0000796343 (see all company filings)')[0]
cik
Out[30]:
'0000796343'
In [31]:
def _wait_random_time():
# Average of 0.5s wait
sleep(randint(0, 50)/100)
In [45]:
ciks = []
for company in companies:
name = company.get('name')
print(name)
href = company.get('href')
if not href:
continue
response = requests.get(href)
soup = BeautifulSoup(response.content, 'html.parser')
cik_field = soup.find(class_='companyInfo').find('a').text
cik = re.findall(r'\d+', cik_field)[0]
ciks.append(dict(cik=cik, name=name))
Boeing
Lockheed Martin
United Technologies
Raytheon
Northrop Grumman
Rockwell Collins, Inc.
General Dynamics
Sturm Ruger & Co
American Outdoor Brands
Microsoft
Hewlett Packard
IBM
Intuit
F5 Networks
Fortinet
Cadence Design Systems
Symantec
Autodesk
Oracle
Adobe Systems
Ford Motor
Tesla
General Motors
Honda Motor
Toyota Motor
Tata Motors
Johnson Controls
Delphi Automotive
Autoliv
Magna International
Lear
BorgWarner
Masco
MDU Resources Group
CRH
Owens Corning
James Hardie Industries
USG
First Data
Cintas
DST Systems
Nielsen Holdings
Booz Allen Hamilton
Alliance Data Systems
Ecolab
Sherwin-Williams
PPG Industries
LyondellBasell Industries
Praxair
Albemarle
Qualcomm
Harris Corporation
Nokia
Juniper Networks
Motorola Solutions
Cisco Systems
Palo Alto Networks
LM Ericsson Telephone
Verizon Communications
China Mobile
Vodafone Group
Windstream Holdings
BT Group
NTT DOCOMO
Nippon Telegraph
EnerSys
Apple
HP
Sony
Western Digital
Kyocera
Flex
Seagate Technology
TE Connectivity
LG Display
Amphenol
Trimble
Garmin
Belden
Corning
Acuity Brands
Sensata Technologies
Avnet
Newell Brands
Colgate-Palmolive
Procter & Gamble
Kimberly-Clark
Unilever
The Estee Lauder
Johnson & Johnson
Merck & Co
Pfizer
Novartis
Sanofi
Caterpillar
Astec
CNH Industrial
Deere
AGCO
Terex
Royal Philips
General Electric
ABB
Canon
Illinois Tool Works
Rockwell Automation
3M
Stanley Black & Decker
Eaton
Roper Technologies
Ingersoll-Rand
Cummins
Dover
Emerson Electric
Danaher
AMETEK
Honeywell International
Parker-Hannifin
Avery Dennison
Raven Industries
V F
Hanesbrands
Nike
Michael Kors Holdings
Whirlpool
Leggett & Platt
Under Armour
Ralph Lauren
Mohawk Industries
Stryker
Abbott Laboratories
Medtronic
Intuitive Surgical
Edwards Lifesciences
Boston Scientific
Zimmer Biomet Holdings
Eni
Total
Exxon Mobil
Imperial Oil
Royal Dutch Shell
Chevron
Baker Hughes
Halliburton
Schlumberger
Oceaneering International
National Oilwell Varco
Core Laboratories
Goodyear Tire & Rubber
Thor Industries
Harley-Davidson
Polaris Industries
Carlisle Companies
Constellium
Walt Disney
Philip Morris
Anheuser-Busch InBev
Berkshire Hathaway
Novo-Nordisk
Icahn Enterprises
Alphabet
Sealed Air
Ball
WestRock
Sonoco Products
AptarGroup
Graphic Packaging Holding
Crown Holdings
Bed Bath & Beyond
Lowe's Companies
Tiffany
Home Depot
TJX Companies
Coach
Gap
Amazon.com
L Brands
Luxottica Group
Ross Stores
Intel
Micron Technology
NXP Semiconductors
Taiwan Semiconductor
Texas Instruments
Cypress
ASML Holding
Applied Materials
SunPower
Canadian Solar
SolarEdge Technologies, Inc.
First Solar
ArcelorMittal
Tenaris
Steel Dynamics
Reliance Steel & Aluminum
Nucor
POSCO
Hasbro
Mattel
Brunswick
Vista Outdoor
SeaWorld Entertainment
Nautilus
Callaway Golf
Pool
In [46]:
ciks_only = []
for company in ciks:
company['name'] = company['name'].rstrip()
cik = company.get('cik')
if cik:
ciks_only.append(dict(cik=cik))
In [47]:
with open('data/companies_from_spreadsheet.json', 'w') as f:
f.write(json.dumps(ciks))
In [48]:
with open('data/companies_from_spreadsheet_ciks_only.json', 'w') as f:
f.write(json.dumps(ciks_only))
In [3]:
with open('data/companies_from_spreadsheet_ciks_only.json', 'r') as f:
ciks = json.loads(f.read())
In [4]:
new_ciks = []
for item in ciks:
cik = item.get('cik')
try:
EdgarCompanyInfo.objects.get(cik=cik)
except EdgarCompanyInfo.DoesNotExist:
new_ciks.append(dict(cik=cik, conformed_name="unknown"))
In [5]:
len(new_ciks)
Out[5]:
96
In [6]:
len(ciks)
Out[6]:
205
In [7]:
with open('data/companies_from_spreadsheet_ciks_new_only.json', 'w') as f:
f.write(json.dumps(new_ciks))
In [8]:
new_ciks
Out[8]:
[{'cik': '0001137411', 'conformed_name': 'unknown'},
{'cik': '0001645590', 'conformed_name': 'unknown'},
{'cik': '0000896878', 'conformed_name': 'unknown'},
{'cik': '0001262039', 'conformed_name': 'unknown'},
{'cik': '0000813672', 'conformed_name': 'unknown'},
{'cik': '0000849399', 'conformed_name': 'unknown'},
{'cik': '0000769397', 'conformed_name': 'unknown'},
{'cik': '0001341439', 'conformed_name': 'unknown'},
{'cik': '0000796343', 'conformed_name': 'unknown'},
{'cik': '0000715153', 'conformed_name': 'unknown'},
{'cik': '0000926042', 'conformed_name': 'unknown'},
{'cik': '0000833444', 'conformed_name': 'unknown'},
{'cik': '0000749098', 'conformed_name': 'unknown'},
{'cik': '0000062996', 'conformed_name': 'unknown'},
{'cik': '0000067716', 'conformed_name': 'unknown'},
{'cik': '0000849395', 'conformed_name': 'unknown'},
{'cik': '0001370946', 'conformed_name': 'unknown'},
{'cik': '0001159152', 'conformed_name': 'unknown'},
{'cik': '0000757011', 'conformed_name': 'unknown'},
{'cik': '0000883980', 'conformed_name': 'unknown'},
{'cik': '0000723254', 'conformed_name': 'unknown'},
{'cik': '0000714603', 'conformed_name': 'unknown'},
{'cik': '0001492633', 'conformed_name': 'unknown'},
{'cik': '0001443646', 'conformed_name': 'unknown'},
{'cik': '0001101215', 'conformed_name': 'unknown'},
{'cik': '0000031462', 'conformed_name': 'unknown'},
{'cik': '0000089800', 'conformed_name': 'unknown'},
{'cik': '0000079879', 'conformed_name': 'unknown'},
{'cik': '0001489393', 'conformed_name': 'unknown'},
{'cik': '0000884905', 'conformed_name': 'unknown'},
{'cik': '0000915913', 'conformed_name': 'unknown'},
{'cik': '0000202058', 'conformed_name': 'unknown'},
{'cik': '0000924613', 'conformed_name': 'unknown'},
{'cik': '0000717826', 'conformed_name': 'unknown'},
{'cik': '0001117795', 'conformed_name': 'unknown'},
{'cik': '0000839923', 'conformed_name': 'unknown'},
{'cik': '0001282266', 'conformed_name': 'unknown'},
{'cik': '0000756620', 'conformed_name': 'unknown'},
{'cik': '0001166141', 'conformed_name': 'unknown'},
{'cik': '0000769594', 'conformed_name': 'unknown'},
{'cik': '0001289308', 'conformed_name': 'unknown'},
{'cik': '0000313838', 'conformed_name': 'unknown'},
{'cik': '0000057083', 'conformed_name': 'unknown'},
{'cik': '0001290109', 'conformed_name': 'unknown'},
{'cik': '0000913142', 'conformed_name': 'unknown'},
{'cik': '0000021665', 'conformed_name': 'unknown'},
{'cik': '0000055785', 'conformed_name': 'unknown'},
{'cik': '0000217410', 'conformed_name': 'unknown'},
{'cik': '0001001250', 'conformed_name': 'unknown'},
{'cik': '0000310158', 'conformed_name': 'unknown'},
{'cik': '0000078003', 'conformed_name': 'unknown'},
{'cik': '0001114448', 'conformed_name': 'unknown'},
{'cik': '0001121404', 'conformed_name': 'unknown'},
{'cik': '0000792987', 'conformed_name': 'unknown'},
{'cik': '0001567094', 'conformed_name': 'unknown'},
{'cik': '0000880266', 'conformed_name': 'unknown'},
{'cik': '0000097216', 'conformed_name': 'unknown'},
{'cik': '0000313216', 'conformed_name': 'unknown'},
{'cik': '0001091587', 'conformed_name': 'unknown'},
{'cik': '0000016988', 'conformed_name': 'unknown'},
{'cik': '0000066740', 'conformed_name': 'unknown'},
{'cik': '0000029905', 'conformed_name': 'unknown'},
{'cik': '0000001800', 'conformed_name': 'unknown'},
{'cik': '0001035267', 'conformed_name': 'unknown'},
{'cik': '0001099800', 'conformed_name': 'unknown'},
{'cik': '0001136869', 'conformed_name': 'unknown'},
{'cik': '0001002242', 'conformed_name': 'unknown'},
{'cik': '0000879764', 'conformed_name': 'unknown'},
{'cik': '0000049938', 'conformed_name': 'unknown'},
{'cik': '0001306965', 'conformed_name': 'unknown'},
{'cik': '0000073756', 'conformed_name': 'unknown'},
{'cik': '0001000229', 'conformed_name': 'unknown'},
{'cik': '0001563411', 'conformed_name': 'unknown'},
{'cik': '0001668717', 'conformed_name': 'unknown'},
{'cik': '0001067983', 'conformed_name': 'unknown'},
{'cik': '0000353278', 'conformed_name': 'unknown'},
{'cik': '0001652044', 'conformed_name': 'unknown'},
{'cik': '0001636023', 'conformed_name': 'unknown'},
{'cik': '0000896622', 'conformed_name': 'unknown'},
{'cik': '0000701985', 'conformed_name': 'unknown'},
{'cik': '0001046179', 'conformed_name': 'unknown'},
{'cik': '0000791915', 'conformed_name': 'unknown'},
{'cik': '0000006951', 'conformed_name': 'unknown'},
{'cik': '0000867773', 'conformed_name': 'unknown'},
{'cik': '0001375877', 'conformed_name': 'unknown'},
{'cik': '0001419612', 'conformed_name': 'unknown'},
{'cik': '0001274494', 'conformed_name': 'unknown'},
{'cik': '0001243429', 'conformed_name': 'unknown'},
{'cik': '0001190723', 'conformed_name': 'unknown'},
{'cik': '0001022671', 'conformed_name': 'unknown'},
{'cik': '0000861884', 'conformed_name': 'unknown'},
{'cik': '0000073309', 'conformed_name': 'unknown'},
{'cik': '0000889132', 'conformed_name': 'unknown'},
{'cik': '0001616318', 'conformed_name': 'unknown'},
{'cik': '0001564902', 'conformed_name': 'unknown'},
{'cik': '0000945841', 'conformed_name': 'unknown'}]
In [ ]:
Content source: MiningTheDisclosures/conflict-minerals-data
Similar notebooks: